/* For detailed instructions, see README file */


OPTIONS SOURCE MACROGEN SYMBOLGEN MPRINT SORTSIZE=MAX COMPRESS=YES ;




/*=========================================*/
/*Select path where files are saved*/
libname OP "INSERT PATH";
/*Select path to save logs*/
%let LOG_PATH=INSERT PATH\Table9.log;
options dlcreatedir; 
%PUT &LOG_PATH;
/*=========================================*/


proc printto log="&LOG_PATH" new;
run;



ODS GRAPHICS OFF;
ODS NORESULTS;
ODS SELECT NONE;
FOOTNOTE " ";
ODS TITLE " ";
/*=========================================*/
/*=========================================*/



DATA REG_DATA;
	SET OP.OPRA_Pseudo;

		MONTH= MONTH(DATE);
		YEAR=YEAR(DATE);
		year_month = input(cats(year,month),yymmn6.);
		format year_month yymmn6.;

		CHARDATE = put(date,date9.);


KEEP

PFOF_IND
ARBITRAGE
DMMP_IND
Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C
GAMMA
VEGA
TICK_CHANGE_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP
UNIQUE_ID
DATE
SENDER
AUCTION_IND
DMM_IDENTITY
YEAR_MONTH
DATE
EQ
PImp_C
S_VOL
AUCTION_IND
QS_OPTIONSCALED
ES_OPTIONSCALED
EFFECTIVESPREAD_C
CHARDATE
SYMBOL_ONLY


;

RUN;

PROC SQL;
	CREATE TABLE DAILY_EQ_AUC
	AS SELECT
	DMM_IDENTITY,MEAN(EQ) AS DAILY_EQ, SUM(PIMP_C) AS DAILY_PIMP_C, SUM(PIMP_C*S_VOL) AS DAILY_PIMP_C_2, COUNT(*) AS DAILY_AUCTION_FREQ, DATE, YEAR_MONTH
	FROM REG_DATA(WHERE=(AUCTION_IND=1))
	GROUP BY DMM_IDENTITY,YEAR_MONTH, DATE;
QUIT;


PROC SQL;
	CREATE TABLE MONTHLY_EQ_AUC
	AS SELECT
	DMM_IDENTITY,MEAN(DAILY_EQ) AS MONTHLY_EQ, MEAN(DAILY_PIMP_C) AS MONTHLY_PIMP_C, MEAN(DAILY_PIMP_C_2) AS MONTHLY_PIMP_C_2,MEAN(DAILY_AUCTION_FREQ) AS MONTHLY_AUCTION_FREQ, YEAR_MONTH
	FROM DAILY_EQ_AUC
	GROUP BY DMM_IDENTITY,YEAR_MONTH;
QUIT;
 

PROC SORT DATA=DAILY_EQ_AUC ;BY DMM_IDENTITY YEAR_MONTH DATE;RUN;

proc expand data= DAILY_EQ_AUC out=DAILY_EQ_AUC method=none;
   convert DAILY_EQ = LAG_DAILY_EQ  /method=none  transout=(LAG 1);
   convert DAILY_PIMP_C = LAG_DAILY_PIMP_C  /method=none  transout=(LAG 1);
   convert DAILY_PIMP_C_2 = LAG_DAILY_PIMP_C_2  /method=none  transout=(LAG 1);
   convert DAILY_AUCTION_FREQ = LAG_DAILY_AUCTION_FREQ /method=none  transout=(LAG 1);

   BY DMM_IDENTITY YEAR_MONTH;
run;
PROC SORT DATA=DAILY_EQ_AUC ;BY DMM_IDENTITY YEAR_MONTH;RUN;

proc expand data= MONTHLY_EQ_AUC out=MONTHLY_EQ_AUC method=none;
   convert MONTHLY_EQ = LAG_MONTHLY_EQ  /method=none  transout=(LAG 1);
   convert MONTHLY_PIMP_C = LAG_MONTHLY_PIMP_C  /method=none  transout=(LAG 1);
   convert MONTHLY_PIMP_C_2 = LAG_MONTHLY_PIMP_C_2  /method=none  transout=(LAG 1);
   convert MONTHLY_AUCTION_FREQ = LAG_MONTHLY_AUCTION_FREQ /method=none  transout=(LAG 1);

   BY DMM_IDENTITY;
run;



PROC SORT DATA=OP.Rule_606 out=Ws_dollar;BY YEAR_MONTH;RUN;
proc tabulate data=Ws_dollar out=Ws_dollar;
class DMM_IDENTITY;
var Total_Contracts;
tables
DMM_IDENTITY*TOTAL_CONTRACTS*(PCTSUM);
BY YEAR_MONTH;
RUN;


PROC SQL;
	CREATE TABLE
	REG_DATA_2_FULL
	AS SELECT
	A.*, B.LAG_DAILY_PIMP_C, B.LAG_DAILY_PIMP_C_2, B.LAG_DAILY_AUCTION_FREQ, B.LAG_DAILY_EQ, C.Total_Contracts, D.LAG_MONTHLY_EQ, D.LAG_MONTHLY_PIMP_C, 
	D.LAG_MONTHLY_PIMP_C_2, D.LAG_MONTHLY_AUCTION_FREQ , C.PCTSUM_CONTRACT, E.PCTSUM AS PCTSUM_CONTRACTS
	FROM  REG_DATA  A LEFT JOIN  DAILY_EQ_AUC B
	ON A.DMM_IDENTITY=B.DMM_IDENTITY & A.DATE=B.DATE
	LEFT JOIN OP.Rule_606 C
	ON A.DMM_IDENTITY=C.DMM_IDENTITY & A.YEAR_MONTH=C.YEAR_MONTH
	LEFT JOIN MONTHLY_EQ_AUC D
	ON A.DMM_IDENTITY=D.DMM_IDENTITY & A.YEAR_MONTH=D.YEAR_MONTH
	LEFT JOIN Ws_dollar E
	ON A.DMM_IDENTITY=E.DMM_IDENTITY & A.YEAR_MONTH=E.YEAR_MONTH
	ORDER BY A.SYMBOL_ONLY;

QUIT;







data REG_DATA_3_FULL;
	set REG_DATA_2_FULL;
	BY SYMBOL_ONLY;



	IF LAG_DAILY_PIMP_C_2<LAG_MONTHLY_PIMP_C_2 THEN PIMPR_BELOW=1; ELSE PIMPR_BELOW=0;
	PIMPR_BLOW_PFOF_INT_AUC= PIMPR_BELOW*PFOF_IND;


	IF LAG_DAILY_AUCTION_FREQ<LAG_MONTHLY_AUCTION_FREQ THEN AUCTION_BELOW=1; ELSE AUCTION_BELOW=0;
	AUCTION_BELOW_PFOF_INT_AUC= AUCTION_BELOW*PFOF_IND;

	IF LAG_DAILY_EQ<LAG_MONTHLY_EQ THEN EQ_BELOW=1; ELSE EQ_BELOW=0;
	EQ_BELOW_PFOF_INT_AUC= EQ_BELOW*PFOF_IND;






	LOG_CONTR_PUR = LOG(1+Total_Contracts);

	


	IF MISSING(LAG_DAILY_PIMP_C_2)=1 OR MISSING(LAG_MONTHLY_PIMP_C_2)=1 THEN PIMPR_BELOW=.;
	IF MISSING(LAG_DAILY_AUCTION_FREQ)=1 OR MISSING(LAG_MONTHLY_AUCTION_FREQ)=1 THEN AUCTION_BELOW=.;
	IF MISSING(LAG_DAILY_EQ)=1 OR MISSING(LAG_MONTHLY_EQ)=1 THEN EQ_BELOW=.;





	IF MISSING(PCTSUM_CONTRACT)=1 THEN DELETE;




RUN;



DATA INPUT;
	SET REG_DATA_3_FULL;


	AUCTION_ABOVE = 1- AUCTION_BELOW;
	EQ_ABOVE = 1- EQ_BELOW;
	EQ_ABOVE_PFOF_INT_AUC= EQ_ABOVE*PFOF_IND;


	/*NORMALIZE TO DOLLARS - SEE TABLE CAPTION*/
	STOCK_QS = STOCK_QS/100;
	QUOTEDSPREAD_C = QUOTEDSPREAD_C/100;

	if auction_ind=1;

	


RUN;





PROC SQL;
	CREATE TABLE INPUT_D1
	AS SELECT

PFOF_IND - 				MEAN(PFOF_IND) AS PFOF_IND,
PCTSUM_CONTRACTS - 		MEAN(PCTSUM_CONTRACTS) AS PCTSUM_CONTRACTS,
EQ_ABOVE - 				MEAN(EQ_ABOVE) AS EQ_ABOVE,
PIMPR_BELOW - 			MEAN(PIMPR_BELOW) AS PIMPR_BELOW,
AUCTION_BELOW - 		MEAN(AUCTION_BELOW) AS AUCTION_BELOW,
Abs_Delta - 			MEAN(Abs_Delta) as Abs_Delta,
STOCK_QS - 			MEAN(STOCK_QS) AS STOCK_QS,
INV_STOCK_MIDPOINT - 	MEAN(INV_STOCK_MIDPOINT) AS INV_STOCK_MIDPOINT,
QUOTEDSPREAD_C - 		MEAN(QUOTEDSPREAD_C) AS QUOTEDSPREAD_C,
GAMMA - 				MEAN(GAMMA) AS GAMMA,
VEGA - 					MEAN(VEGA) AS VEGA,
INV_OPTION_MIDPOINT - 	MEAN(INV_OPTION_MIDPOINT) AS INV_OPTION_MIDPOINT,
CALL_IND - 				MEAN(CALL_IND) AS CALL_IND,
LOG_DAYS_EXP - 			MEAN(LOG_DAYS_EXP) AS LOG_DAYS_EXP,
LOG_S_VOL - 			MEAN(LOG_S_VOL) AS LOG_S_VOL,
TICK_CHANGE_IND - 		MEAN(TICK_CHANGE_IND) AS TICK_CHANGE_IND,
LOG_MCAP - 				MEAN(LOG_MCAP) AS LOG_MCAP,
CHARDATE, SYMBOL_ONLY
FROM INPUT
GROUP BY SYMBOL_ONLY;
QUIT;



PROC SQL;
	CREATE TABLE INPUT_D2
	AS SELECT

PFOF_IND - 				MEAN(PFOF_IND) AS PFOF_IND,
PCTSUM_CONTRACTS - 		MEAN(PCTSUM_CONTRACTS) AS PCTSUM_CONTRACTS,
EQ_ABOVE - 				MEAN(EQ_ABOVE) AS EQ_ABOVE,
PIMPR_BELOW - 			MEAN(PIMPR_BELOW) AS PIMPR_BELOW,
AUCTION_BELOW - 		MEAN(AUCTION_BELOW) AS AUCTION_BELOW,
Abs_Delta - 			MEAN(Abs_Delta) as Abs_Delta,
STOCK_QS - 				MEAN(STOCK_QS) AS STOCK_QS,
INV_STOCK_MIDPOINT - 	MEAN(INV_STOCK_MIDPOINT) AS INV_STOCK_MIDPOINT,
QUOTEDSPREAD_C - 		MEAN(QUOTEDSPREAD_C) AS QUOTEDSPREAD_C,
GAMMA - 				MEAN(GAMMA) AS GAMMA,
VEGA - 					MEAN(VEGA) AS VEGA,
INV_OPTION_MIDPOINT - 	MEAN(INV_OPTION_MIDPOINT) AS INV_OPTION_MIDPOINT,
CALL_IND - 				MEAN(CALL_IND) AS CALL_IND,
LOG_DAYS_EXP - 			MEAN(LOG_DAYS_EXP) AS LOG_DAYS_EXP,
LOG_S_VOL - 			MEAN(LOG_S_VOL) AS LOG_S_VOL,
TICK_CHANGE_IND - 		MEAN(TICK_CHANGE_IND) AS TICK_CHANGE_IND,
LOG_MCAP - 				MEAN(LOG_MCAP) AS LOG_MCAP,
CHARDATE, SYMBOL_ONLY
FROM INPUT_D1
GROUP BY CHARDATE;
QUIT;


PROC SORT DATA=INPUT; BY SYMBOL_ONLY;RUN;



ODS OUTPUT ParameterEstimates = ParameterEstimates_clus;
proc surveyreg data=INPUT_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 

PFOF_IND=

EQ_ABOVE
PCTSUM_CONTRACTS


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
TICK_CHANGE_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP


/NOINT
;


quit;
ODS RESULTS;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
		Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;


ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=INPUT;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;
model 

PFOF_IND=


PCTSUM_CONTRACTS

EQ_ABOVE


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
TICK_CHANGE_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP



CHARDATE  

/solution noint; 
run;
quit;




data FitStatistics (keep=parameter value Type);
	set FitStatistics;

		rename  RSquare=value;

		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
run;






data ParameterEstimates_s(keep=parameter value Type);
	set ParameterEstimates_s_clus;

run;

data ParameterEstimates_p(keep=parameter probt);
	set ParameterEstimates_p_clus;

run;

data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
	rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;



data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;





data Regression_1;
	set ParameterEstimates_e parameterEstimates_s FITSTATISTICS nobs;


	if parameter="EQ_ABOVE" then n=1;
	if parameter="PIMPR_BELOW" then n=2;
	if parameter="AUCTION_BELOW" then n=3;
	if parameter="PCTSUM_CONTRACTS" then n=4;	
	if parameter="CONTRACT_IND" then n=5;	
    if parameter="ARBITRAGE" then n=6;
	if parameter="QS_OPTIONSCALED" or parameter="QUOTEDSPREAD_C" or parameter="QuotedSpread_C" then n=7;
	if parameter="MULTIPLE_IND" THEN n=8;
	if parameter="DMMP_IND" then n=9;	
	if parameter ="GAMMA" then n=10;
	if parameter ="VEGA"  then n=11;
	if parameter = "HC" OR parameter = "Abs_Delta" or parameter = "ABS_DELTA" then n=12;
	if parameter="LOG_S_VOL" then n=13;
	if parameter = "STOCK_QS" or parameter="STOCK_QS_C"  then n=14;
	if parameter ="CALL_IND" then n=15;
	if parameter ="BUY_D" then n=16;
	if parameter="LAG_LOG_DAILY_OPTION_VOL" OR parameter="LAG_LOG_DAILY_OPTION" then n=17;
	if parameter = "TICK_CHANGE_IND" then n=18;
	if parameter = "LOG_DAYS_EXP" then n=19;
	if parameter ="INV_STOCK_MIDPOINT" then  n=20;
	if parameter ="INV_OPTION_MIDPOINT" then n=21;
	if parameter ="LOG_MCAP" then n=22;
	if parameter="Rsq" then n=25;
	if parameter ="NObs" then n=26; 


run;


proc sql;
	create table Regression_1
	as select a.*, b.Probt
	from Regression_1 A left join ParameterEstimates_p B
	ON A.parameter = b.parameter;
quit;










ODS OUTPUT ParameterEstimates = ParameterEstimates_clus;
proc surveyreg data=INPUT_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 

PFOF_IND=

PIMPR_BELOW
PCTSUM_CONTRACTS


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
TICK_CHANGE_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP



/NOINT
;


quit;
ODS RESULTS;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
	Type ="S";
		rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
		set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;






ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=INPUT;
ABSORB SYMBOL_ONLY;
CLASS CHARDATE;
model 

PFOF_IND=

PIMPR_BELOW
PCTSUM_CONTRACTS


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
TICK_CHANGE_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP



CHARDATE 


/solution noint; 
run;
quit;






data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
	rename  RSquare=value;

		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
run;






data ParameterEstimates_s(keep=parameter value Type);
	set ParameterEstimates_s_clus;
run;

data ParameterEstimates_p(keep=parameter probt);
	set ParameterEstimates_p_clus;
run;

data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
	rename Estimate=value;
	Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;




data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;





data Regression_2;
	set ParameterEstimates_e parameterEstimates_s FITSTATISTICS nobs;

	if parameter="EQ_ABOVE" then n=1;
	if parameter="PIMPR_BELOW" then n=2;
	if parameter="AUCTION_BELOW" then n=3;
	if parameter="PCTSUM_CONTRACTS" then n=4;	
	if parameter="CONTRACT_IND" then n=5;	
    if parameter="ARBITRAGE" then n=6;
	if parameter="QS_OPTIONSCALED" or parameter="QUOTEDSPREAD_C" or parameter="QuotedSpread_C" then n=7;
	if parameter="MULTIPLE_IND" THEN n=8;
	if parameter="DMMP_IND" then n=9;	
	if parameter ="GAMMA" then n=10;
	if parameter ="VEGA"  then n=11;
	if parameter = "HC" OR parameter = "Abs_Delta" or parameter = "ABS_DELTA" then n=12;
	if parameter="LOG_S_VOL" then n=13;
	if parameter = "STOCK_QS" or parameter="STOCK_QS_C"  then n=14;
	if parameter ="CALL_IND" then n=15;
	if parameter ="BUY_D" then n=16;
	if parameter="LAG_LOG_DAILY_OPTION_VOL" OR parameter="LAG_LOG_DAILY_OPTION" then n=17;
	if parameter = "TICK_CHANGE_IND" then n=18;
	if parameter = "LOG_DAYS_EXP" then n=19;
	if parameter ="INV_STOCK_MIDPOINT" then  n=20;
	if parameter ="INV_OPTION_MIDPOINT" then n=21;
	if parameter ="LOG_MCAP" then n=22;
	if parameter="Rsq" then n=25;
	if parameter ="NObs" then n=26; 

run;


proc sql;
	create table Regression_2
	as select a.*, b.Probt
	from Regression_2 A left join ParameterEstimates_p B
	ON A.parameter = b.parameter;
quit;








ODS OUTPUT ParameterEstimates = ParameterEstimates_clus;
proc surveyreg data=INPUT_D2;
cluster SYMBOL_ONLY CHARDATE ;
 
model 
PFOF_IND=



AUCTION_BELOW
PCTSUM_CONTRACTS


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
TICK_CHANGE_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP






/NOINT
;


quit;
ODS RESULTS;





data ParameterEstimates_s_clus(keep=parameter value Type);
	set ParameterEstimates_clus;
	Type ="S";
	rename StdErr = Value;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);


run;

data ParameterEstimates_p_clus(keep=parameter probt);
	set ParameterEstimates_clus;
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;









ODS OUTPUT ParameterEstimates = ParameterEstimates;
ODS OUTPUT FitStatistics = FitStatistics;
ODS OUTPUT NObs = NObs;
proc glm namelen=64  data=INPUT;
CLASS CHARDATE;
ABSORB SYMBOL_ONLY;

model 
PFOF_IND=



AUCTION_BELOW
PCTSUM_CONTRACTS


Abs_Delta
STOCK_QS
INV_STOCK_MIDPOINT
QUOTEDSPREAD_C

GAMMA
VEGA
TICK_CHANGE_IND
INV_OPTION_MIDPOINT
CALL_IND
LOG_DAYS_EXP
LOG_S_VOL
TICK_CHANGE_IND
LOG_MCAP




CHARDATE 


/solution noint; 
run;
quit;



data FitStatistics (keep=parameter value Type);
	set FitStatistics;
	
		rename  RSquare=value;

		parameter = "Rsq";
		type ="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
run;






data ParameterEstimates_s(keep=parameter value Type);
	set ParameterEstimates_s_clus;

run;

data ParameterEstimates_p(keep=parameter probt);
	set ParameterEstimates_p_clus;
run;

data ParameterEstimates_e (keep=parameter value Type);
	set ParameterEstimates;
		rename Estimate=value;
		Type="E";
		IF find(parameter, "SYMBOL_ONLY")=0;
		IF find(parameter, "CHARDATE")=0;
		IF find(parameter, "sender")=0;
		IF find(parameter, "SENDER")=0;
		IF find(parameter,"PFOF_Model")=0;
		IF find(parameter,"DIRECTED_OF")=0;
		parameter = UPCASE(parameter);

run;




data nobs(keep=value type parameter);
	set nobs;
	type = "E";
	if label = "Number of Observations Read";
	parameter = "NObs";
	rename NObsRead = value;

run;




data Regression_3;
	set ParameterEstimates_e parameterEstimates_s FITSTATISTICS nobs;
	if parameter="EQ_ABOVE" then n=1;
	if parameter="PIMPR_BELOW" then n=2;
	if parameter="AUCTION_BELOW" then n=3;
	if parameter="PCTSUM_CONTRACTS" then n=4;	
	if parameter="CONTRACT_IND" then n=5;	
    if parameter="ARBITRAGE" then n=6;
	if parameter="QS_OPTIONSCALED" or parameter="QUOTEDSPREAD_C" or parameter="QuotedSpread_C" then n=7;
	if parameter="MULTIPLE_IND" THEN n=8;
	if parameter="DMMP_IND" then n=9;	
	if parameter ="GAMMA" then n=10;
	if parameter ="VEGA"  then n=11;
	if parameter = "HC" OR parameter = "Abs_Delta" or parameter = "ABS_DELTA" then n=12;
	if parameter="LOG_S_VOL" then n=13;
	if parameter = "STOCK_QS" or parameter="STOCK_QS_C"  then n=14;
	if parameter ="CALL_IND" then n=15;
	if parameter ="BUY_D" then n=16;
	if parameter="LAG_LOG_DAILY_OPTION_VOL" OR parameter="LAG_LOG_DAILY_OPTION" then n=17;
	if parameter = "TICK_CHANGE_IND" then n=18;
	if parameter = "LOG_DAYS_EXP" then n=19;
	if parameter ="INV_STOCK_MIDPOINT" then  n=20;
	if parameter ="INV_OPTION_MIDPOINT" then n=21;
	if parameter ="LOG_MCAP" then n=22;
	if parameter="Rsq" then n=25;
	if parameter ="NObs" then n=26; 


run;



proc sql;
	create table Regression_3
	as select a.*, b.Probt
	from Regression_3 A left join ParameterEstimates_p B
	ON A.parameter = b.parameter;
quit;















DATA TEMP;
	SET REGRESSION_1 REGRESSION_2 REGRESSION_3;
RUN;

PROC SORT DATA=TEMP NODUPKEY; BY PARAMETER TYPE N;RUN;



PROC SQL;
	CREATE TABLE Regression_Combined
	AS SELECT 

	G.PARAMETER AS PARAMETER,G.TYPE AS TYPE,G.N AS N,
	A.VALUE AS VALUE_R3,A.PROBT AS PROBT_R3, A.N as N_R3, A.PARAMETER AS PARAMETER_R3,A.TYPE AS TYPE_R3,
	B.VALUE AS VALUE_R2,B.PROBT AS PROBT_R2, B.N as N_R2, B.PARAMETER AS PARAMETER_R2,B.TYPE AS TYPE_R2,
	D.VALUE AS VALUE_R1,D.PROBT AS PROBT_R1, D.N as N_R1, D.PARAMETER AS PARAMETER_R1,D.TYPE AS TYPE_R1
	FROM
	TEMP G
	LEFT JOIN REGRESSION_1 D
	ON G.PARAMETER=D.PARAMETER & G.TYPE=D.TYPE
	LEFT JOIN
	REGRESSION_2 B 
	ON G.PARAMETER=B.PARAMETER & G.TYPE=B.TYPE
	LEFT JOIN REGRESSION_3 A
	ON G.PARAMETER=A.PARAMETER & G.TYPE=A.TYPE;
QUIT;








DATA Regression_Combined;
	SET Regression_Combined;
	PARAMETER = compress(PARAMETER);


IF PARAMETER="PIMPR_BELOW" & TYPE="E" then VARIABLE = PUT("Price Improvement Below Average$_{t-1}$",$48.);
IF PARAMETER="EQ_ABOVE" & TYPE="E" then VARIABLE = PUT("EQ Above Average$_{t-1}$",$48.);
IF PARAMETER="AUCTION_BELOW" & TYPE="E" then VARIABLE = PUT("Auction Frequency Below Average$_{t-1}$",$48.);
IF PARAMETER="PCTSUM_CONTRACTS" & TYPE="E" then VARIABLE = PUT("Contract Purchases (\%)",$48.);


IF PARAMETER = "PFOF_IND" & TYPE="E" then VARIABLE = PUT("PFOF",$48.);
IF PARAMETER = "PFOF_IND" & TYPE="S" then VARIABLE = PUT(" ",$48.);


if PARAMETER="QUOTEDSPREAD_C" & TYPE="E" then VARIABLE = PUT("Option Quoted Spread",$48.);
if PARAMETER="QUOTEDSPREAD_C" & TYPE="S" then VARIABLE = PUT(" ",$48.);


if PARAMETER="DMMP_IND" & TYPE="E" then VARIABLE = PUT("DMMP",$48.);
if PARAMETER="DMMP_IND" & TYPE="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER="VEGA" & TYPE="E" then VARIABLE = PUT("Vega",$48.);
if PARAMETER="VEGA" & TYPE="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER="C_HC" & TYPE="E" then VARIABLE = PUT("Initial Hedge Cost",$48.);
if PARAMETER="C_HC" & TYPE="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER="ABS_DELTA"  & TYPE="E" THEN VARIABLE = PUT("$\vert$Delta$\vert$",$48.);
IF PARAMETER ="ABS_DELTA" & TYPE="S" THEN VARIABLE = PUT(" ",$48.);

if PARAMETER="GAMMA" & TYPE="E" then VARIABLE = PUT("Gamma",$48.);
if PARAMETER="GAMMA" & TYPE="S" then VARIABLE = PUT(" ",$48.);

if PARAMETER="AUC_PFOF" & TYPE="S" then VARIABLE = PUT(" ",$48.);
if PARAMETER="C_HC" & TYPE="S" then VARIABLE = PUT(" ",$48.);

IF PARAMETER = "TICK_CHANGE_IND" & TYPE="E" THEN  VARIABLE = PUT("Tick Size",$48.);
IF PARAMETER = "TICK_CHANGE_IND" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER = "INV_OPTION_MIDPOINT" & TYPE="E" THEN VARIABLE = PUT("1/Option Midpoint",$48.);
IF PARAMETER = "INV_OPTION_MIDPOINT" & TYPE="S" THEN VARIABLE = PUT(" ",$48.);


IF PARAMETER = "INV_STOCK_MIDPOINT" & TYPE="E" THEN VARIABLE = PUT("1/Underlying Midpoint",$48.);
IF PARAMETER = "INV_STOCK_MIDPOINT" & TYPE="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER = "CALL_IND" & TYPE="E" THEN  VARIABLE = PUT("Call",$48.);
IF PARAMETER = "CALL_IND" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER = "STOCK_QS" & TYPE="E" THEN VARIABLE = PUT("Underlying Quoted Spr.",$48.);
IF PARAMETER = "STOCK_QS" & TYPE="S" THEN VARIABLE = PUT(" ",$48.);

IF PARAMETER = "LOG_S_VOL" & TYPE="E" THEN  VARIABLE = PUT("Trade size",$48.);
IF PARAMETER = "LOG_S_VOL" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);

IF PARAMETER="CONTRACT_IND" & TYPE="E" THEN  VARIABLE = PUT("Contract size $\le$ 5",$48.);
IF PARAMETER="CONTRACT_IND" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);





IF PARAMETER = "LOG_DAYS_EXP" & TYPE="E" THEN  VARIABLE = PUT("Days-to-Expiry",$48.);
IF PARAMETER = "LOG_DAYS_EXP" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);


IF PARAMETER ="STOCK_QS"  & Type="E"  then  VARIABLE = PUT("Underlying Quoted Spread",$48.);
IF PARAMETER ="STOCK_QS"  & Type="S" then VARIABLE = PUT(" ",$48.);




IF PARAMETER = "LOG_MCAP" & TYPE="E" THEN  VARIABLE = PUT("Underlying MCAP",$48.);
IF PARAMETER = "LOG_MCAP" & TYPE="S" THEN  VARIABLE = PUT(" ",$48.);



char_2dash= put("\\",$12.);




IF PARAMETER = "Rsq"  THEN  VARIABLE = PUT("RSq",$48.);
IF PARAMETER = "NObs"  THEN  VARIABLE = PUT("NObs",$48.);






	if VARIABLE not in ("RSq","NObs","Avg. Inv Mills - N.AUC","Avg. Inv Mills - AUC") & NOT MISSING(Value_R1) then do;
	
			if    PROBT_R1<0.01 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\threeS"),$28.);
			else if PROBT_R1<0.05 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\twoS"),$28.);
			else if PROBT_R1<0.1 & Type_R1="E" then VALUE_CHAR_R1=PUT(CATS(PUT(Value_R1,8.3),"\oneS"),$28.);
			else if PROBT_R1>0.1 & Type_R1="E" then VALUE_CHAR_R1=PUT(PUT(Value_R1,8.3),$CHAR18.);
	end;
	
	



	if VARIABLE not in ("RSq","NObs","Avg. Inv Mills - N.AUC","Avg. Inv Mills - AUC") & NOT MISSING(Value_R2) then do;
	
			if    PROBT_R2<0.01 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\threeS"),$28.);
			else if PROBT_R2<0.05 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\twoS"),$28.);
			else if PROBT_R2<0.1 & Type_R2="E" then VALUE_CHAR_R2=PUT(CATS(PUT(Value_R2,8.3),"\oneS"),$28.);
			else if PROBT_R2>0.1 & Type_R2="E" then VALUE_CHAR_R2=PUT(PUT(Value_R2,8.3),$CHAR28.);
	end;
	
	

	
	
	
	if VARIABLE not in ("RSq","NObs","Avg. Inv Mills - N.AUC","Avg. Inv Mills - AUC") & NOT MISSING(Value_R3) then do;
	
			if    PROBT_R3<0.01 & Type_R3="E" then VALUE_CHAR_R3=PUT(CATS(PUT(Value_R3,8.3),"\threeS"),$48.);
			else if PROBT_R3<0.05 & Type_R3="E" then VALUE_CHAR_R3=PUT(CATS(PUT(Value_R3,8.3),"\twoS"),$48.);
			else if PROBT_R3<0.1 & Type_R3="E" then VALUE_CHAR_R3=PUT(CATS(PUT(Value_R3,8.3),"\oneS"),$48.);
			else if PROBT_R3>0.1 & Type_R3="E" then VALUE_CHAR_R3=PUT(PUT(Value_R3,8.3),$CHAR38.);
	end;
	

	
	
	
	

	




		if Type="S" then do;
			/*PUT STANDARD ERRORS IN BRACKETS*/
				IF NOT MISSING(Value_R1) THEN
				VALUE_CHAR_R1=PUT(CATS("(",PUT(Value_R1,8.3),")"),$28.);
				IF NOT MISSING(Value_R2) THEN
				VALUE_CHAR_R2=PUT(CATS("(",PUT(Value_R2,8.3),")"),$28.);
				IF NOT MISSING(Value_R3) THEN
				VALUE_CHAR_R3=PUT(CATS("(",PUT(Value_R3,8.3),")"),$28.);
				
	
			end;
			
	
	 


if VARIABLE in ("RSq","NObs","Avg. Inv Mills - N.AUC","Avg. Inv Mills - AUC") then do;
	
VALUE_CHAR_R1=PUT(PUT(Value_R1,8.3),$28.);
VALUE_CHAR_R2=PUT(PUT(Value_R2,8.3),$28.);
VALUE_CHAR_R3=PUT(PUT(Value_R3,8.3),$28.);



END;





RUN;



PROC SORT DATA=Regression_Combined;BY N TYPE;RUN;

DATA Regression_Combined;
	SET Regression_Combined;
	IF TYPE="S" THEN VARIABLE=" ";
RUN;



data fixed_effects;
  do i= 1 to 2;
  output;
  end;
run;


data fixed_effects;
	set fixed_effects;
		if _n_=1 then do; VARIABLE = PUT("Date FE",$48.); N=22; TYPE="Z"; end;
		if _n_=2 then do; VARIABLE = PUT("Underlying FE",$48.); N=24; TYPE="Z";end;
		
run;


DATA Regression_Combined;
	SET Regression_Combined fixed_effects;

	IF VARIABLE = "Date FE" THEN DO;
		VALUE_CHAR_R3=put("Yes",$28.);
		VALUE_CHAR_R2=put("Yes",$28.);
		VALUE_CHAR_R1=put("Yes",$28.);

;
		
		END;


		IF VARIABLE = "Underlying FE" THEN DO;
		VALUE_CHAR_R3=put("Yes",$28.);
		VALUE_CHAR_R2=put("Yes",$28.);
		VALUE_CHAR_R1=put("Yes",$28.);
;
		END;
run;


PROC SORT DATA=Regression_Combined; BY N TYPE;RUN;

/*=========================================*/
ODS GRAPHICS OFF;
ODS RESULTS;
ODS SELECT ALL;
ODS TITLE "TABLE 9: PFOF LPM: DMMs� Execution Quality Comparison (Pseudo Data)";
FOOTNOTE "This table presents results from a linear probability model for auctions at exchanges where
 the designated market maker (DMM) pays payment for orderflow (PFOF) using Rule 606
 data and pseudo OPRA data";

PROC REPORT data=Regression_Combined;
columns
variable 
value_char_R1
value_char_R2
value_char_R3



;


	DEFINE VARIABLE / DISPLAY "Variable" format =$168.;
	DEFINE value_char_R1  /DISPLAY "(2)" format =$168.;
	DEFINE value_char_R2  /DISPLAY "(3)" format =$168.;
	DEFINE value_char_R3  /DISPLAY "(4)" format =$168.;

run;
/*=========================================*/
FOOTNOTE " ";
ODS TITLE " ";





proc printto log=log;
run;






